# ------------------------------------------------------------
# Purpose: Install/load required R packages, read config, and define paths & utility functions
# Usage:   Source at the start of any analysis session.
# ------------------------------------------------------------
# Install pacman if needed
if (!require("pacman")) install.packages("pacman", quiet = TRUE)
library(pacman)

# Load all required packages
p_load(
    tidyverse, data.table, tidytable,
    purrr, jsonlite, broom, fixest, xtable,
    binsreg, RStata, countrycode, ggthemes, 
    modelsummary, statar, rmapshaper,
    RColorBrewer, sf, plm, 
    stringi, phonics
)

# Read the config file
if (!file.exists("config.json")) stop("config.json not found in working directory.")
config <- fromJSON("config.json")
project_dir <- config$project_dir
options("RStata.StataPath" = config$stata_bin)
options("RStata.StataVersion" = config$stata_version)

# Set up paths for various data folders
pcode <- file.path(project_dir, "code")
pdataraw <- file.path(project_dir, "data", "raw")
pdataconfidential <- file.path(project_dir, "data", "confidential")
pdataconfinterim <- file.path(project_dir, "data", "conf_interim")
pdataanalysis <- file.path(project_dir, "data", "analysis")
pdataconfanalysis <- file.path(project_dir, "data", "conf_analysis")
poutput <- file.path(project_dir, "output")
poutputappendix <- file.path(project_dir, "output", "appendix")

# Print key paths for confirmation
cat(
    "Paths set:\n",
    "Code:           ", pcode, "\n",
    "Raw:            ", pdataraw, "\n",
    "Confidential:   ", pdataconfidential, "\n",
    "Conf Interim:   ", pdataconfinterim, "\n",
    "Analysis:       ", pdataanalysis, "\n",
    "Conf Analysis:  ", pdataconfanalysis, "\n",
    "Output:         ", poutput, "\n",
    "Output Appendix:", poutputappendix, "\n"
)

# Utility functions
move_estimates_rows <- function(tex_file, origin_marker, destination_marker) {
    lines <- readLines(tex_file)
    const_idx <- grep(origin_marker, lines)
    se_idx <- const_idx + 1
    const_rows <- lines[c(const_idx, se_idx)]
    lines <- lines[-c(const_idx, se_idx)]
    marker_idx <- grep(destination_marker, lines)
    insert_at <- marker_idx[1] - 1
    lines <- append(lines, const_rows, after = insert_at)
    writeLines(lines, tex_file)
}
move_table_row <- function(tex_file, origin_marker, destination_marker) {
    lines <- readLines(tex_file)
    idx <- grep(origin_marker, lines)
    row <- lines[idx]
    lines <- lines[-idx]
    marker_idx <- grep(destination_marker, lines)
    insert_at <- marker_idx[1] - 1
    lines <- append(lines, row, after = insert_at)
    writeLines(lines, tex_file)
}
add_table_row <- function(tex_file, marker, additional_row, where = "after") {
    lines <- readLines(tex_file)
    idx <- grep(marker, lines)[1]
    if (where == "after") {
        lines <- append(lines, additional_row, after = idx)
    } else if (where == "before") {
        lines <- append(lines, additional_row, after = idx - 1)
    }
    writeLines(lines, tex_file)
}
remove_table_row <- function(tex_file, marker) {
    lines <- readLines(tex_file)
    idx <- grep(marker, lines)
    lines <- lines[-(idx)]
    writeLines(lines, tex_file)
}

get_estimates_rows <- function(path) {
    x <- read_lines(path)
    mid <- str_which(x, "\\\\midrule")[1]
    bot <- str_which(x, "    \\\\")[1]
    x[(mid + 1):(bot - 1)]
}

get_fstat_from_stata <- function(command, data.in) {
    stata_result <- stata(command, data.in = data.in, data.out = TRUE)
    swf_list <- apply(stata_result, 1, function(row) paste(row, collapse = "; "))
    return(swf_list)
}

collapse_stage1 <- function(lines, neworder) {
    unlist(map(lines, function(line) {
        cells <- str_split(line, " & ", simplify = TRUE)
        new <- c(cells[1], cells[neworder])
        new[is.na(new)] <- ""
        paste0(paste(new, collapse = " & "), " \\\\")
    }))
}

get_table_row <- function(path, marker) {
    lines <- read_lines(path)
    idx <- grep(marker, lines)
    lines[idx]
}

edit_table_content <- function(path, old, new) {
    lines <- readLines(path)
    lines <- str_replace(lines, old, new)
    writeLines(lines, path)
}

edit_table_content_fixed <- function(path, old, new) {
    lines <- readLines(path)
    lines <- str_replace(lines, fixed(old), new)
    writeLines(lines, path)
}


get_clustering_coefs_rows <- function(path) {
    x <- read_lines(path)
    mid <- str_which(x, "hline")[2]
    bot <- str_which(x, "hline")[3]
    x <- x[(mid + 1):(bot - 1)]
    writeLines(x, path)
}

get_summary_stats_rows <- function(path, margin = 0) {
    x <- read_lines(path)
    mid <- str_which(x, "toprule")
    bot <- str_which(x, "bottomrule")
    x <- x[(mid + margin):(bot - margin)]
    writeLines(x, path)
}

comma3 <- function(x) {
    formatC(x,
        format = "f",
        digits = 3,
        big.mark = ",",
        drop0trailing = TRUE
    )
}

print.xtable_custom <- function(x, file = "") {
    if (file != "") {
        con <- file(file, "w")
        sink(con)
    }

    cat("\\begin{tabular}{lcr}\n")
    cat("\\toprule\n")
    cat("Patents with & Number & Share (\\%) \\\\\n")
    cat("\\midrule\n")
    for (i in 1:nrow(x)) {
        cat(paste(x[i, 1], " & ", format(x[i, 2], nsmall = 2, big.mark = ","), " & ", format(x[i, 3], nsmall = 2), " \\\\\n", sep = ""))
    }
    cat("\\bottomrule\n")
    cat("\\end{tabular}\n")

    if (file != "") {
        sink()
        close(con)
        cat("Table saved to:", file, "\n")
    }
}

create_figureB03 <- function(year_val) {
    ## Subset data
    d <- na.omit(countyLevel18501940[
        year == year_val,
        .(gisjoin_1900, entropy_namelast_mp_adjp, n_sum_namelast_mp_adjp)
    ])

    # Calculate residualized entropy
    d[, entropy_namelast_mp_adjp_r := as.numeric(resid(lm(entropy_namelast_mp_adjp ~
        log(n_sum_namelast_mp_adjp), d))) + mean(entropy_namelast_mp_adjp)]

    # Prepare map data
    dfplot <- left_join(cnty1900, d)

    ## Create plot
    letter_suffix <- letters[which(years_to_plot == year_val)]
    plotname <- file.path(poutputappendix, paste0("figureB03", letter_suffix, ".pdf"))
    p <- ggplot() +
        geom_sf(
            data = dfplot, aes(fill = cut_number(entropy_namelast_mp_adjp_r, 7)),
            lwd = 0, colour = NA
        ) +
        coord_sf(crs = st_crs(dfplot), datum = NA) +
        scale_fill_brewer(
            palette = "GnBu", na.value = "grey",
            guide = guide_legend("Surname diversity")
        ) +
        geom_sf(
            data = state1900,
            fill = NA, colour = "black", size = 0.1
        ) +
        coord_sf(crs = st_crs(state1900), datum = NA) +
        ggthemes::theme_map() +
        theme(legend.position = "none") +
        ggtitle(paste0("Surname Diversity - ", year_val))

    ggsave(plotname, p, width = 8, height = 4.6)

    return(plotname)
}


create_figureB09 <- function(year_val) {

    # Prepare map data
    dfplot <- left_join(cnty1900, d %>% filter(year == year_val))

    ## Create plot
    letter_suffix <- letters[which(years_to_plot == year_val)]
    plotname <- file.path(poutputappendix, paste0("figureB09", letter_suffix, ".pdf"))
    p <- ggplot() +
        geom_sf(
            data = dfplot, aes(fill = cut_number(iv_lo_entropy_namelast_mp_adjp_fe_immig_r_w, 7)),
            lwd = 0, colour = NA
        ) +
        coord_sf(crs = st_crs(dfplot), datum = NA) +
        scale_fill_brewer(
            palette = "GnBu", na.value = "grey"
        ) +
        geom_sf(
            data = state1900,
            fill = NA, colour = "black", size = 0.1
        ) +
        coord_sf(crs = st_crs(state1900), datum = NA) +
        ggthemes::theme_map() +
        theme(legend.position = "none")

    ggsave(plotname, p, width = 8, height = 4.6)

    return(plotname)
}

create_figureB10 <- function(year_val) {
    # Prepare map data
    dfplot <- left_join(cnty1900, d %>% filter(year == year_val))

    ## Create plot
    letter_suffix <- letters[which(years_to_plot == year_val)]
    plotname <- file.path(poutputappendix, paste0("figureB10", letter_suffix, ".pdf"))
    p <- ggplot() +
        geom_sf(
            data = dfplot, aes(fill = cut_number(iv_lo_entropy_namelast_mp_adjp_fe_immig_r_w, 7)),
            lwd = 0, colour = NA
        ) +
        coord_sf(crs = st_crs(dfplot), datum = NA) +
        scale_fill_brewer(
            palette = "GnBu", na.value = "grey"
        ) +
        geom_sf(
            data = state1900,
            fill = NA, colour = "black", size = 0.1
        ) +
        coord_sf(crs = st_crs(state1900), datum = NA) +
        ggthemes::theme_map() +
        theme(legend.position = "none")

    ggsave(plotname, p, width = 8, height = 4.6)

    return(plotname)
}